From: Keir Fraser Date: Mon, 5 Jan 2009 10:45:48 +0000 (+0000) Subject: PoD memory 7/9: Xen interface X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~14019^2~20 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks://%22Dat/%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22Dat?a=commitdiff_plain;h=36e30441326aa951ced7b1890d654d715db3bd3a;p=xen.git PoD memory 7/9: Xen interface Implement Xen interface to PoD functionality. * Increase the number of MEMOP bits from 4 to 6 (increasing the number of available memory operations from 16 to 64). * Introduce XENMEMF_populate_on_demand, which will cause populate_physmap() to fill a range with PoD entries rather than backing it with ram * Introduce XENMEM_[sg]et_pod_target operation to the memory hypercall, to get and set PoD cache size. set_pod_target() should be called during domain creation, as well as after modifying the memory target of any domain which may have outstanding PoD entries. Signed-off-by: George Dunlap --- diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 2adc1ed2c7..efdde83524 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -3976,6 +3976,49 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) return 0; } + case XENMEM_set_pod_target: + case XENMEM_get_pod_target: + { + xen_pod_target_t target; + struct domain *d; + + /* Support DOMID_SELF? */ + if ( !IS_PRIV(current->domain) ) + return -EINVAL; + + if ( copy_from_guest(&target, arg, 1) ) + return -EFAULT; + + rc = rcu_lock_target_domain_by_id(target.domid, &d); + if ( rc != 0 ) + return rc; + + if ( op == XENMEM_set_pod_target ) + { + if ( target.target_pages > d->max_pages ) + { + rc = -EINVAL; + goto pod_target_out_unlock; + } + + rc = p2m_pod_set_mem_target(d, target.target_pages); + } + + target.tot_pages = d->tot_pages; + target.pod_cache_pages = d->arch.p2m->pod.count; + target.pod_entries = d->arch.p2m->pod.entry_count; + + if ( copy_to_guest(arg, &target, 1) ) + { + rc= -EFAULT; + goto pod_target_out_unlock; + } + + pod_target_out_unlock: + rcu_unlock_domain(d); + return rc; + } + default: return subarch_memory_op(op, arg); } diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index a8ed7ca7fc..b5868c7645 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -387,6 +387,150 @@ static struct page_info * p2m_pod_cache_get(struct domain *d, return p; } +/* Set the size of the cache, allocating or freeing as necessary. */ +static int +p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target) +{ + struct p2m_domain *p2md = d->arch.p2m; + int ret = 0; + + /* Increasing the target */ + while ( pod_target > p2md->pod.count ) + { + struct page_info * page; + int order; + + if ( (pod_target - p2md->pod.count) >= (1>>9) ) + order = 9; + else + order = 0; + + page = alloc_domheap_pages(d, order, 0); + if ( unlikely(page == NULL) ) + goto out; + + p2m_pod_cache_add(d, page, order); + } + + /* Decreasing the target */ + /* We hold the p2m lock here, so we don't need to worry about + * cache disappearing under our feet. */ + while ( pod_target < p2md->pod.count ) + { + struct page_info * page; + int order, i; + + /* Grab the lock before checking that pod.super is empty, or the last + * entries may disappear before we grab the lock. */ + spin_lock(&d->page_alloc_lock); + + if ( (p2md->pod.count - pod_target) > (1>>9) + && !list_empty(&p2md->pod.super) ) + order = 9; + else + order = 0; + + page = p2m_pod_cache_get(d, order); + + ASSERT(page != NULL); + + spin_unlock(&d->page_alloc_lock); + + /* Then free them */ + for ( i = 0 ; i < (1 << order) ; i++ ) + { + /* Copied from common/memory.c:guest_remove_page() */ + if ( unlikely(!get_page(page+i, d)) ) + { + gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); + ret = -EINVAL; + goto out; + } + + if ( test_and_clear_bit(_PGT_pinned, &(page+i)->u.inuse.type_info) ) + put_page_and_type(page+i); + + if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) ) + put_page(page+i); + + put_page(page+i); + } + } + +out: + return ret; +} + +/* + * The "right behavior" here requires some careful thought. First, some + * definitions: + * + M: static_max + * + B: number of pages the balloon driver has ballooned down to. + * + P: Number of populated pages. + * + T: Old target + * + T': New target + * + * The following equations should hold: + * 0 <= P <= T <= B <= M + * d->arch.p2m->pod.entry_count == B - P + * d->tot_pages == P + d->arch.p2m->pod.count + * + * Now we have the following potential cases to cover: + * B arch.p2m; + int ret = 0; + unsigned long populated; + + /* P == B: Nothing to do. */ + if ( p2md->pod.entry_count == 0 ) + goto out; + + /* T' < B: Don't reduce the cache size; let the balloon driver + * take care of it. */ + if ( target < d->tot_pages ) + goto out; + + populated = d->tot_pages - p2md->pod.count; + + pod_target = target - populated; + + /* B < T': Set the cache size equal to # of outstanding entries, + * let the balloon driver fill in the rest. */ + if ( pod_target > p2md->pod.entry_count ) + pod_target = p2md->pod.entry_count; + + ASSERT( pod_target > p2md->pod.count ); + + ret = p2m_pod_set_cache_target(d, pod_target); + +out: + return ret; +} + void p2m_pod_empty_cache(struct domain *d) { @@ -538,6 +682,13 @@ p2m_pod_decrease_reservation(struct domain *d, } } + /* If we've reduced our "liabilities" beyond our "assets", free some */ + if ( p2md->pod.entry_count < p2md->pod.count ) + { + printk("b %d\n", p2md->pod.entry_count); + p2m_pod_set_cache_target(d, p2md->pod.entry_count); + } + /* If there are no more non-PoD entries, tell decrease_reservation() that * there's nothing left to do. */ if ( nonpod == 0 ) @@ -786,7 +937,7 @@ p2m_pod_emergency_sweep_super(struct domain *d) /* Stop if we're past our limit and we have found *something*. * * NB that this is a zero-sum game; we're increasing our cache size - * by re-increasing our 'debt'. Since we hold the p2m lock, + * by increasing our 'debt'. Since we hold the p2m lock, * (entry_count - count) must remain the same. */ if ( !list_empty(&p2md->pod.super) && i < limit ) break; diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c index 942c35a1e2..e14f6fab31 100644 --- a/xen/arch/x86/x86_64/compat/mm.c +++ b/xen/arch/x86/x86_64/compat/mm.c @@ -128,6 +128,29 @@ int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) break; } + case XENMEM_set_pod_target: + case XENMEM_get_pod_target: + { + struct compat_pod_target cmp; + struct xen_pod_target *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE; + + if ( copy_from_guest(&cmp, arg, 1) ) + return -EFAULT; + + XLAT_pod_target(nat, &cmp); + + rc = arch_memory_op(op, guest_handle_from_ptr(nat, void)); + if ( rc < 0 ) + break; + + XLAT_pod_target(&cmp, nat); + + if ( copy_to_guest(arg, &cmp, 1) ) + rc = -EFAULT; + + break; + } + case XENMEM_machphys_mapping: { struct domain *d = current->domain; diff --git a/xen/common/memory.c b/xen/common/memory.c index 0d92808bd2..bf10bbbfd2 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -111,31 +111,40 @@ static void populate_physmap(struct memop_args *a) if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) ) goto out; - page = alloc_domheap_pages(d, a->extent_order, a->memflags); - if ( unlikely(page == NULL) ) + if ( a->memflags & MEMF_populate_on_demand ) { - gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: " - "id=%d memflags=%x (%ld of %d)\n", - a->extent_order, d->domain_id, a->memflags, - i, a->nr_extents); - goto out; + if ( guest_physmap_mark_populate_on_demand(d, gpfn, + a->extent_order) < 0 ) + goto out; } + else + { + page = alloc_domheap_pages(d, a->extent_order, a->memflags); + if ( unlikely(page == NULL) ) + { + gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: " + "id=%d memflags=%x (%ld of %d)\n", + a->extent_order, d->domain_id, a->memflags, + i, a->nr_extents); + goto out; + } - mfn = page_to_mfn(page); - guest_physmap_add_page(d, gpfn, mfn, a->extent_order); + mfn = page_to_mfn(page); + guest_physmap_add_page(d, gpfn, mfn, a->extent_order); - if ( !paging_mode_translate(d) ) - { - for ( j = 0; j < (1 << a->extent_order); j++ ) - set_gpfn_from_mfn(mfn + j, gpfn + j); + if ( !paging_mode_translate(d) ) + { + for ( j = 0; j < (1 << a->extent_order); j++ ) + set_gpfn_from_mfn(mfn + j, gpfn + j); - /* Inform the domain of the new page's machine address. */ - if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) - goto out; + /* Inform the domain of the new page's machine address. */ + if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) + goto out; + } } } - out: +out: a->nr_done = i; } @@ -527,6 +536,10 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg) args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags)); + if ( op == XENMEM_populate_physmap + && (reservation.mem_flags & XENMEMF_populate_on_demand) ) + args.memflags |= MEMF_populate_on_demand; + if ( likely(reservation.domid == DOMID_SELF) ) { d = rcu_lock_current_domain(); diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index 2d4fd382f0..d2778a92c0 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -261,6 +261,10 @@ void p2m_pod_dump_data(struct domain *d); * (usually in preparation for domain destruction) */ void p2m_pod_empty_cache(struct domain *d); +/* Set populate-on-demand cache size so that the total memory allocated to a + * domain matches target */ +int p2m_pod_set_mem_target(struct domain *d, unsigned long target); + /* Call when decreasing memory reservation to handle PoD entries properly. * Will return '1' if all entries were handled and nothing more need be done.*/ int diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index d7b9fff972..6edd59b5ef 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -48,6 +48,8 @@ /* NUMA node to allocate from. */ #define XENMEMF_node(x) (((x) + 1) << 8) #define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) +/* Flag to populate physmap with populate-on-demand entries */ +#define XENMEMF_populate_on_demand (1<<16) #endif struct xen_memory_reservation { @@ -299,6 +301,19 @@ struct xen_foreign_memory_map { typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); +#define XENMEM_set_pod_target 16 +#define XENMEM_get_pod_target 17 +struct xen_pod_target { + /* IN */ + uint64_t target_pages; + /* OUT */ + uint64_t tot_pages; + uint64_t pod_cache_pages; + uint64_t pod_entries; + /* IN */ + domid_t domid; +}; +typedef struct xen_pod_target xen_pod_target_t; #endif /* __XEN_PUBLIC_MEMORY_H__ */ /* diff --git a/xen/include/xen/hypercall.h b/xen/include/xen/hypercall.h index 3997b2f96a..99d2e0008a 100644 --- a/xen/include/xen/hypercall.h +++ b/xen/include/xen/hypercall.h @@ -48,7 +48,7 @@ do_platform_op( * at what point in the page list to resume. For this purpose I steal the * high-order bits of the @cmd parameter, which are otherwise unused and zero. */ -#define MEMOP_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */ +#define MEMOP_EXTENT_SHIFT 6 /* cmd[:6] == start_extent */ #define MEMOP_CMD_MASK ((1 << MEMOP_EXTENT_SHIFT) - 1) extern long diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index 08bd72d8ce..0b78e8647e 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -72,6 +72,8 @@ int assign_pages( /* memflags: */ #define _MEMF_no_refcount 0 #define MEMF_no_refcount (1U<<_MEMF_no_refcount) +#define _MEMF_populate_on_demand 1 +#define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand) #define _MEMF_node 8 #define MEMF_node(n) ((((n)+1)&0xff)<<_MEMF_node) #define _MEMF_bits 24 diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst index b10d215254..0bfa78f6c6 100644 --- a/xen/include/xlat.lst +++ b/xen/include/xlat.lst @@ -38,6 +38,7 @@ ! memory_exchange memory.h ! memory_map memory.h ! memory_reservation memory.h +! pod_target memory.h ! translate_gpfn_list memory.h ! sched_poll sched.h ? sched_remote_shutdown sched.h